rm(list = ls())
library(data.table)
library(plyr)
library(tidyr)
library(lfe)
library(stargazer)
library(xtable)
library(sandwich)
library(roll)
library(readxl)
library(readr)
library(zoo)
library(texreg)
library(DescTools)
library(ggplot2)

m <- data.table(read_xlsx("../Data/MasterData.xlsx", skip = 1, guess_max = 1e4))
m[, age := 2021 - year]


m[, memrableperiod_pret := memrableperiod_pret * 100]
m[, memrableperiod_aret := memrableperiod_aret * 100]
m[, bias := (memrableperiod_pret - memrableperiod_aret)]

m[, retrecall_1day_rate := retrecall_1day_rate * 100]
m[, retrecall_30day_rate := retrecall_30day_rate * 100]
m[, retrecall_1year_rate := retrecall_1year_rate * 100]
m[, retrecall_5year_rate := retrecall_5year_rate * 100]

m[, exretrecall_1day_rate := retrecall_1day_rate - ret_lagd1 * 100]
m[, exretrecall_1year_rate := retrecall_1year_rate - ret_lagy1 * 100]

m[, expret30day_market_rate := expret30day_market_rate * 100]
m[, expret1year_market_rate := expret1year_market_rate * 100]
m[, expret30day_self_rate := expret30day_self_rate * 100]
m[, expret1year_self_rate := expret1year_self_rate * 100]


###########################################################################
# Free Recall
###########################################################################

m[, ret_intraday2 := ret_intraday2 * 100]
m[, ret_lagmonth := (ret_lagw1 + ret_lagw2 + ret_lagw3 + ret_lagw4) * 100]
m[, ret_lagy1 := ret_lagy1 * 100]
m[, exretrecall_30day_rate := retrecall_30day_rate - ret_lagmonth]

# m[, fraction_up := word_count_up / (word_count_up + word_count_down + word_count_neutral)]
# m[type == 0] 


## Table 4
m[, memrableperiod_begin := as.yearmon(memrableperiod_begin, "%Ym%m")]
m[, memrableperiod_end := as.yearmon(memrableperiod_end, "%Ym%m")]
m[, memrableperiod_max := max(memrableperiod_end)]
m[, memrableperiod_dist := as.yearmon(2022 + 2 / 12) - (memrableperiod_begin + memrableperiod_end) / 2]

# m[type == 0 & memrableperiod_end >= memrableperiod_max - 5, .(mean(memrableperiod_end - memrableperiod_begin))]
# m[type == 0 & memrableperiod_end < memrableperiod_max - 5, .(mean(memrableperiod_end - memrableperiod_begin))]

sample <- m[type == 0 & memrableperiod_end <= "Dec 2020"] # at least one year before the end of sample

sample <- sample[!is.na(ret_intraday2) & !is.na(ret_lagmonth) & !is.na(memrableperiod_pret)]

f1 <- felm(memrableperiod_pret ~ ret_intraday2 | age + gender + education + total_wealth + total_income +
  accountcheck_freq + newscheck_freq + discussion_freq + num_wechat | 0 | date, sample)

f2 <- felm(memrableperiod_pret ~ ret_lagmonth | age + gender + education + total_wealth + total_income +
  accountcheck_freq + newscheck_freq + discussion_freq + num_wechat | 0 | date, sample)

f3 <- felm(memrableperiod_pret ~ ret_lagy1 | age + gender + education + total_wealth + total_income +
             accountcheck_freq + newscheck_freq + discussion_freq + num_wechat | 0 | date, sample)


f4 <- felm(memrableperiod_pret ~ ret_intraday2 + ret_lagmonth + ret_lagy1 | age + gender + education + total_wealth + total_income +
  accountcheck_freq + newscheck_freq + discussion_freq + num_wechat | 0 | date, sample)


# 5 years
sample <- m[type == 0 & memrableperiod_end >= memrableperiod_max - 5 &
  memrableperiod_end <= "Dec 2020"]

sample <- sample[!is.na(ret_intraday2) & !is.na(ret_lagmonth) & !is.na(memrableperiod_pret)]

f5 <- felm(memrableperiod_pret ~ ret_intraday2 | age + gender + education + total_wealth + total_income +
  accountcheck_freq + newscheck_freq + discussion_freq + num_wechat | 0 | date, sample)

f6 <- felm(memrableperiod_pret ~ ret_lagmonth | age + gender + education + total_wealth + total_income +
  accountcheck_freq + newscheck_freq + discussion_freq + num_wechat | 0 | date, sample)

f7 <- felm(memrableperiod_pret ~ ret_lagy1 | age + gender + education + total_wealth + total_income +
             accountcheck_freq + newscheck_freq + discussion_freq + num_wechat | 0 | date, sample)

f8 <- felm(memrableperiod_pret ~ ret_intraday2 + ret_lagmonth + ret_lagy1 | age + gender + education + total_wealth + total_income +
             accountcheck_freq + newscheck_freq + discussion_freq + num_wechat | 0 | date, sample)

stargazer(f1, f2, f3, f4, f5, f6, f7, f8, 
  align = TRUE, dep.var.labels.include = TRUE,
  covariate.labels = c("MktRet\\textsubscript{today}", "MktRet\\textsubscript{1M}", "MktRet\\textsubscript{1Y}"),
  omit.stat = c("LL", "ser", "F", "rsq"), ord.intercepts = FALSE, no.space = TRUE,
  title = "",
  single.row = FALSE, column.sep.width = "0pt", digits = 2
)


# Reply R3 
# All treatments, control for type
sample <- m[type >= 0 & memrableperiod_end <= "Dec 2020"] # at least one year before the end of sample

sample <- sample[!is.na(ret_intraday2) & !is.na(ret_lagmonth) & !is.na(memrableperiod_pret)]


f1 <- felm(memrableperiod_pret ~ ret_intraday2 | age + gender + education + total_wealth + total_income +
  accountcheck_freq + newscheck_freq + discussion_freq + num_wechat + type | 0 | date, sample)

f2 <- felm(memrableperiod_pret ~ ret_lagmonth | age + gender + education + total_wealth + total_income +
  accountcheck_freq + newscheck_freq + discussion_freq + num_wechat + type | 0 | date, sample)

f3 <- felm(memrableperiod_pret ~ ret_lagy1 + ret_lagmonth | age + gender + education + total_wealth + total_income +
  accountcheck_freq + newscheck_freq + discussion_freq + num_wechat + type | 0 | date, sample)


f4 <- felm(memrableperiod_pret ~ ret_intraday2 + ret_lagmonth + ret_lagy1 | age + gender + education + total_wealth + total_income +
             accountcheck_freq + newscheck_freq + discussion_freq + num_wechat + type | 0 | date, sample)



stargazer(f1, f2, f3, f4, 
          align = TRUE, dep.var.labels.include = TRUE,
          covariate.labels = c("Market return, today", "Market return, past month"),
          omit.stat = c("LL", "ser", "F", "rsq"), ord.intercepts = FALSE, no.space = TRUE,
          title = "",
          single.row = FALSE, column.sep.width = "0pt", digits = 2
)



# 5 years
sample <- m[type >= 0 & memrableperiod_end >= memrableperiod_max - 5 &
  memrableperiod_end <= "Dec 2020"]

sample <- sample[!is.na(ret_intraday2) & !is.na(ret_lagmonth) & !is.na(memrableperiod_pret)]

f5 <- felm(memrableperiod_pret ~ ret_intraday2 | age + gender + education + total_wealth + total_income +
  accountcheck_freq + newscheck_freq + discussion_freq + num_wechat + type | 0 | date, sample)

f6 <- felm(memrableperiod_pret ~ ret_lagmonth | age + gender + education + total_wealth + total_income +
  accountcheck_freq + newscheck_freq + discussion_freq + num_wechat + type | 0 | date, sample)

f7 <- felm(memrableperiod_pret ~ ret_lagy1 | age + gender + education + total_wealth + total_income +
  accountcheck_freq + newscheck_freq + discussion_freq + num_wechat + type | 0 | date, sample)


f8 <- felm(memrableperiod_pret ~ ret_intraday2 + ret_lagmonth + ret_lagy1 | age + gender + education + total_wealth + total_income +
             accountcheck_freq + newscheck_freq + discussion_freq + num_wechat + type | 0 | date, sample)

stargazer(f1, f4, f5, f8,
  align = TRUE, dep.var.labels.include = TRUE,
  covariate.labels = c("MktRet\\textsubscript{today}", "MktRet\\textsubscript{1M}", "MktRet\\textsubscript{1Y}"),
  omit.stat = c("LL", "ser", "F", "rsq"), ord.intercepts = FALSE, no.space = TRUE,
  title = "",
  single.row = FALSE, column.sep.width = "0pt", digits = 2
)
